from scipy import stats
from math import sqrt
import numpy as np
import matplotlib.pyplot as pltfrom scipy import stats
from math import sqrt
import numpy as np
import matplotlib.pyplot as plt

n = 100
alpha = 0.05
k = np.arange(n+1)
T = stats.binom(n,.5)

R1 = np.arange(40); R1
R2 = n - R1; R2
R = np.concatenate([R1,R2[::-1]])

plt.plot(k,T.pmf(k), "k.", alpha = 0.5)
plt.plot(R,T.pmf(R), "b.", ms=8)
plt.savefig("TH-1-pmf.png")
plt.show()

plt.plot(k,T.cdf(k), "k.", ms=1)
x = np.array([39,61])
plt.plot(x,T.cdf(x), "b.", ms=10)
plt.text(45,0, "$P(T \\leq 39) = F_T(39) \\doteq 0.0176 < \\alpha/2$")
plt.text(65,0.9, "$F_T(60) \\doteq 0.9823 > 1 - \\alpha/2$")
plt.text(65,0.8, "$= P(T \\geq 61)$")
plt.savefig("TH-2-cdf.png")
plt.show()

T.ppf(alpha/2), T.ppf(1-alpha/2)

(40.0, 60.0)

T.cdf(60)

0.9823998998911476

T.cdf(39), T.cdf(40), 1-T.cdf(59), 1-T.cdf(60), T.sf(59), T.sf(60)

(0.017600100108852396,
 0.028443966820490392,
 0.02844396682049044,
 0.01760010010885238,
 0.028443966820490392,
 0.017600100108852396)

print(R)
T.pmf(R).sum()

[  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  61  62  63  64  65  66  67  68  69  70  71  72  73  74
  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91  92
  93  94  95  96  97  98  99 100]

0.035200200217704786

plt.plot(k,T.pmf(k), "k.", alpha=0.5)
plt.plot(R,T.pmf(R), "b.", ms=8)
plt.show()

T.ppf(alpha)

42.0

RR = np.arange(42); RR

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41])

plt.plot(k,T.pmf(k), "k.", alpha=0.5)
plt.plot(RR,T.pmf(RR), "r.", ms=8)
plt.savefig("TH-3-pmf-onesided.png")
plt.show()

RR

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41])

stats.binom.cdf(41,n,0.4), stats.binom.pmf(RR, n, 0.4).sum()

(0.6225326761221707, 0.6225326761221721)

stats.binom.cdf(39,n,0.4)+stats.binom.sf(n-39-1,n,0.4), stats.binom.pmf(R, n, 0.4).sum()

(0.46209338234839553, 0.4620933823483936)

p = np.arange(0,1,.01)

stats.binom.cdf(41, n, 0.5)

0.044313040057033834

plt.plot(p, stats.binom.cdf(39,n,p)+stats.binom.sf(n-39-1,n,p), 'b-', alpha=0.9)
plt.plot(p, stats.binom.cdf(41, n, p), 'r-', alpha=0.9)
plt.grid(True)
plt.legend(['Oboustranný test', 'Jednostranný test'])
plt.title("Síla testu mince v závislosti na skutečném $p$. $\\alpha=.05$")
plt.savefig("TH-4-power1.png")
plt.show()

alpha = 0.1
k = np.arange(n+1)
T = stats.binom(n,.5)
a = T.ppf(alpha/2)
b = T.ppf(alpha)

a, b

(42.0, 44.0)

plt.plot(p, stats.binom.cdf(a-1,n,p)+stats.binom.sf(n-a,n,p), 'b-', alpha=0.9)
plt.plot(p, stats.binom.cdf(b-1, n, p), 'r-', alpha=0.9)
plt.grid(True)
plt.legend(['Oboustranný test', 'Jednostranný test'])
plt.title("Síla testu mince v závislosti na skutečném $p$. $\\alpha=.1$")
plt.savefig("TH-5-power2.png")
plt.show()

n = 1000
alpha = 0.05
k = np.arange(n+1)
T = stats.binom(n,.5)
a = T.ppf(alpha/2)
b = T.ppf(alpha)

a, b

(469.0, 474.0)

plt.plot(p, stats.binom.cdf(a-1,n,p)+stats.binom.sf(n-a,n,p), 'b-', alpha=0.9)
plt.plot(p, stats.binom.cdf(b-1, n, p), 'r-', alpha=0.9)
plt.grid(True)
plt.legend(['Oboustranný test', 'Jednostranný test'])
plt.title("Síla testu mince. $n = 1000$, $\\alpha=.05$")
plt.savefig("TH-6-power3.png")
plt.show()

stats.binomtest(k=35, n=100, p=0.5, alternative='two-sided')

BinomTestResult(k=35, n=100, alternative='two-sided', statistic=0.35, pvalue=0.0035176417229701587)

stats.binomtest(k=40, n=100, p=0.5, alternative='two-sided')

BinomTestResult(k=40, n=100, alternative='two-sided', statistic=0.4, pvalue=0.056887933640980784)

stats.binomtest(k=43, n=100, p=0.5, alternative='two-sided')

BinomTestResult(k=43, n=100, alternative='two-sided', statistic=0.43, pvalue=0.1933479044956428)

stats.binomtest(k=468, n=1000, p=0.5, alternative='two-sided')

BinomTestResult(k=468, n=1000, alternative='two-sided', statistic=0.468, pvalue=0.04629119736049808)

stats.binomtest(k=469, n=1000, p=0.5, alternative='two-sided')

BinomTestResult(k=469, n=1000, alternative='two-sided', statistic=0.469, pvalue=0.053677849645009976)

stats.binomtest(k=469, n=1000, p=0.5, alternative='less')

BinomTestResult(k=469, n=1000, alternative='less', statistic=0.469, pvalue=0.026838924822504988)

stats.binomtest(k=472, n=1000, p=0.5, alternative='two-sided')

BinomTestResult(k=472, n=1000, alternative='two-sided', statistic=0.472, pvalue=0.08193870178881396)

stats.binomtest(k=472, n=1000, p=0.5, alternative='less')

BinomTestResult(k=472, n=1000, alternative='less', statistic=0.472, pvalue=0.04096935089440698)

obs = np.array([92, 120, 88, 98, 95, 107])

stats.chisquare(obs)

Power_divergenceResult(statistic=6.86, pvalue=0.23126224576166762)

stats.power_divergence(obs, lambda_='log-likelihood')

Power_divergenceResult(statistic=6.689778405551933, pvalue=0.24475286293843238)

sum(obs)

600

sum([ (x-100)**2/100 for x in obs])

6.86

sum((obs-100)**2/100)

6.86

1-stats.chi2.cdf(6.86, df=5)

0.23126224576166765

stats.chi2.ppf(0.95, df=5)

11.070497693516351

sum(2*obs*np.log(obs/100))

6.689778405551933

n = 600
X = stats.multinomial(n, [1/6]*6).rvs(size=10_000)
chisq = ((X-n/6)**2/(n/6)).sum(axis=1)
plt.hist(chisq, bins=50, density=True)
plt.plot(q:=np.arange(0,50,.2),stats.chi2.pdf(q, df=5),'r-');

n = 60
X = stats.multinomial(n, [1/6]*6).rvs(size=10_000)
chisq = ((X-n/6)**2/(n/6)).sum(axis=1)
plt.hist(chisq, bins=50, density=True)
plt.plot(q:=np.arange(0,50,.2),stats.chi2.pdf(q, df=5),'r-');

n = 6
X = stats.multinomial(n, [1/6]*6).rvs(size=10_000)
chisq = ((X-n/6)**2/(n/6)).sum(axis=1)
plt.hist(chisq, bins=50, density=True)
plt.plot(q:=np.arange(0,50,.2),stats.chi2.pdf(q, df=5),'r-');

Z = stats.norm.rvs(size=(10_000,5))
plt.hist((Z**2).sum(axis=1), bins=30, density=True)
plt.plot(q:=np.arange(0,50,.2), stats.chi2.pdf(q, df=5),'r-');

q=np.arange(0,20,.2)
plt.plot(q,stats.chi2.pdf(q, df=1),'r-')
plt.plot(q,stats.chi2.pdf(q, df=2),'b-')
plt.plot(q,stats.chi2.pdf(q, df=3),'m-')
plt.plot(q,stats.chi2.pdf(q, df=4),'g-')
plt.legend([f"$\\chi^2_{k}$" for k in [1,2,3,4]]);

Test férovosti mince¶

Jednostranný odhad¶

Spočítáme sílu testu¶

Co když nám vadí nízká síla testu?¶

Zvýšení $\alpha$¶

Zvýšení $n$ je lepší, pokud to jde!¶

A konečně, co když nechceme vše dělat na koleně?¶

chi^2 a G-test¶

Histogram $\chi_5^2 = \sum_{i=1}^6 \frac{(O_i-E_i)^2}{E_i}$¶

Součet pěti druhých mocnin normálního rozdělení¶

Hustota $\chi^2$ s $k$ stupni volnosti¶